import re
import requests
# headers = {
#     'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
# }
# url = 'https://book.douban.com/'
# res = requests.get(url, headers=headers)
with open('douban.txt' , mode='r',encoding='utf-8') as f:
    text = f.read()
content = text
# content = res.text
# print(res.status_code)
pattern = re.compile('div class="info".*?href="(.*?)".*?title="(.*?)".*?<span class="author">(.*?)</span>.*?<span class="year">(.*?)</span>.*?', re.S)
result = re.findall(pattern, content)
# print(result)
for results in result:
    url, name, author, data = results
    # 这两行作用和strip一样
    # author = re.sub('\s', '',author)
    # data = re.sub('\s','',data)
    print(url, name, author.strip(), data.strip())